I'm gonna overwrite a lot of this notebook's old content. I changed the way I'm calculating wt, and wanna test that my training worked.



In [1]:

    
from pearce.emulator import OriginalRecipe, ExtraCrispy, SpicyBuffalo
from pearce.mocks import cat_dict
import numpy as np
from os import path



In [2]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [3]:

    
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test//PearceRedMagicXiCosmoFixedNd_Test.hdf5'
em_method = 'gbdt'
#split_method = 'random'



In [4]:

    
a = 1.0
z = 1.0/a - 1.0



In [5]:

    
fixed_params = {'z':z}#, 'r':24.06822623}

np.random.seed(0) emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\ custom_mean_function = 'linear', downsample_factor=0.02)#, #hyperparams = {'n_estimators': 500, # 'max_depth': 5})



In [6]:

    
#np.random.seed(0)
estimators = 100
emu = SpicyBuffalo(training_file, method = em_method, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 1.0,
                  hyperparams = {'n_estimators': estimators})#, 'max_depth': depth})









    



/home/users/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/emu.py:287: UserWarning: WARNING: NaN detected. Skipped 21 points in training data.
  warnings.warn('WARNING: NaN detected. Skipped %d points in training data.' % (num_skipped))






    



40000 18

gof = emu.goodness_of_fit(test_file, statistic = 'log_frac') #print gof.mean(), np.median(gof) for g in gof: print g.mean(), np.median(g) print '*'*50



In [7]:

    
for i in xrange(50):    
    params = {}

    for pname in emu.get_param_names():
        if pname == 'r':
            continue
        low, high = emu.get_param_bounds(pname)
        params[pname] = np.random.uniform(low, high)
    pred_y = emu.emulate_wrt_r(params)[0]
    
    plt.plot(emu.scale_bin_centers, pred_y)
    plt.xscale('log')
    plt.show()
    #print pred_y
    #print params



In [8]:

    
for i, (g, r) in enumerate(zip(gof, emu.scale_bin_centers)):
    print r, g.mean(), np.median(g)
    #plt.hist(np.log10(g))
    #plt.show()









    




NameErrorTraceback (most recent call last)
<ipython-input-8-450638e57f5f> in <module>()
----> 1 for i, (g, r) in enumerate(zip(gof, emu.scale_bin_centers)):
      2     print r, g.mean(), np.median(g)
      3     #plt.hist(np.log10(g))
      4     #plt.show()

NameError: name 'gof' is not defined

plt.hist(np.log10(gof) );

from sklearn.model_selection import train_test_split

x, y, yerr = emu.x, emu.y, emu.yerr downsample_idxs = np.random.choice(x.shape[0], size = int(0.08*x.shape[0]), replace = False) x,y, yerr = x[downsample_idxs, :], y[downsample_idxs], yerr[downsample_idxs] train_x, test_x, train_y, test_y, train_yerr, test_yerr = train_test_split(x, y, yerr, test_size = 0.1)



In [ ]:

    
n_cosmo_params = 7
loo_cosmo = emu.x[0, 0,  :n_cosmo_params]

loo_cosmo_idxs = np.all(emu.x[:, :,:n_cosmo_params] == loo_cosmo, axis =2)
train_x, train_y, train_yerr = emu.x[~loo_cosmo_idxs, :], emu.y[ ~loo_cosmo_idxs], emu.yerr[ ~loo_cosmo_idxs]
test_x, test_y, test_yerr = emu.x[loo_cosmo_idxs, :], emu.y[loo_cosmo_idxs], emu.yerr[loo_cosmo_idxs]

train_x, train_y, train_yerr = emu.x, emu.y, emu.yerr



In [ ]:

    
model = emu._emulator
model.compute(train_x, train_yerr)

test_x, test_y, test_yerr, _ = emu.get_data(test_file,fixed_params, None)



In [ ]:

    
pred_y = model.predict(train_y, test_x, False, False, False)*emu._y_std + emu._y_mean



In [ ]:

    
np.mean(np.abs((pred_y-test_y)/test_y))
#np.mean(np.abs((pred_y-train_y)/train_y))

for idx in xrange(50): plt.plot(emu.scale_bin_centers, ypred[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'Emu') plt.plot(emu.scale_bin_centers, emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'True') plt.title(np.sum(emu.x[(idx+1)*emu.n_bins, :-1]) ) plt.legend(loc='best') plt.xscale('log') plt.show()



In [ ]:

    
queue_skipper: True
        system: sherlock
        n_jobs: 400
        max_time: 6
resids = np.abs(emu.y*emu._y_std+emu._y_mean - ypred)



In [ ]:

    
np.mean(resids/(emu.y*emu._y_std+emu._y_mean))



In [ ]:

    
ypred.mean(), emu._y_mean

plt.plot(emu.scale_bin_centers, np.abs(gof.mean(axis = 0)) ) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.05) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.1) plt.loglog();

plt.plot(emu.scale_bin_centers, np.abs(gof.T),alpha = 0.1, color = 'b') plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01, lw = 2, color = 'k') plt.loglog();



In [ ]:

    
test_gof = emu.goodness_of_fit(test_file, statistic = 'log_frac')
print test_gof.mean()



In [ ]:

    
test_gof = emu.goodness_of_fit(test_file, statistic = 'frac')
print test_gof.mean()



In [ ]:

    
plt.hist(np.log10(test_gof));



In [ ]:

    
test_x



In [ ]:

    
(emu.x*emu._x_std) + emu._x_mean



In [ ]:

    
emu.get_param_names()



In [ ]:

    
test_x_white, test_y_white = (test_x - emu._x_mean)/(emu._x_std + 1e-5), (test_y - emu._y_mean)/(emu._y_std + 1e-5)



In [ ]:

    
model = emu._emulator



In [ ]:

    
pred_y_white = model.predict(emu.y, test_x_white, False, False, False)



In [ ]:

    
pred_y = pred_y_white*emu._y_std + emu._y_mean



In [ ]:

    
plt.plot(pred_y[:100], label = 'pred')
plt.plot(test_y[:100], label = 'truth')

plt.legend(loc = 'best')



In [ ]:

    
test_y.mean(), emu._y_mean, pred_y.mean()



In [ ]:

    
test_y.std(), emu._y_std, pred_y.std()



In [ ]:

    
plt.hist(pred_y_white, bins = np.linspace(-3, 3, 100), label = 'Pred')
plt.hist(test_y_white, bins = np.linspace(-3, 3, 100), label = 'Test', alpha = 0.4);
plt.legend(loc = 'best')



In [ ]:



In [ ]: